###########################################
#  Primary Divisions: How Voters Evaluate Policy and Group Differences in Intra-Party Contests
#   - Forthcoming at The Journal of Politics
#   - Henderson et al 2021
#
###########################################
#  - code by S. Goggin & J. Henderson
########################################################
# This file produces descriptives for the sophistication measure using the whole common content CCES survey
########################################################

#dirs="~/Dropbox/replication0/"
#dirs should be set here or in runR.R 

rm(list=ls()[which(ls()!='dirs')])
library(foreign)

setwd(dirs)
load('data/CCES16_Common_OUTPUT_Feb2018_VV.RData')
library(car)
cces2016=x#

cces2016$valid_primary = as.numeric(!is.na(cces2016$CL_E2016PPVM))
cces2016$valid_general = as.numeric(!is.na(cces2016$CL_E2016GVM))

cces2016$pid3clean <- as.numeric(as.character(recode(cces2016$pid7,"'Independent'=0;'Not sure'=0;'Lean Democrat'=-1;'Not very strong Democrat'=-1;'Strong Democrat'=-1;'Lean Republican'=1;'Not very strong Republican'=1;'Strong Republican'=1;else=NA")))
cces2016$CC16_421_dem[which(cces2016$CC16_421_dem=='Not Asked')]=NA
cces2016$CC16_421_dem[which(cces2016$CC16_421_dem=='Skipped')]=NA
cces2016$CC16_421_rep[which(cces2016$CC16_421_rep=='Not Asked')]=NA
cces2016$CC16_421_rep[which(cces2016$CC16_421_rep=='Skipped')]=NA
cces2016$CC16_421b[which(cces2016$CC16_421b=='Not Asked')]=NA
cces2016$CC16_421b[which(cces2016$CC16_421b=='Skipped')]=NA

cces2016$CC16_421_dem=gsub(cces2016$CC16_421_dem,pattern='so strong',replace='very strong')
cces2016$CC16_421_rep=gsub(cces2016$CC16_421_rep,pattern='so strong',replace='very strong')
cces2016$CC16_421b=gsub(cces2016$CC16_421b,pattern='Neither',replace='Independent')
cces2016$CC16_421b=gsub(cces2016$CC16_421b,pattern='The Democratic Party',replace='Lean Democrat')
cces2016$CC16_421b=gsub(cces2016$CC16_421b,pattern='The Republican Party',replace='Lean Republican')

cces2016$pid7post=NA
cces2016$pid7post[which(!is.na(cces2016$CC16_421_dem))]=cces2016$CC16_421_dem[which(!is.na(cces2016$CC16_421_dem))]
cces2016$pid7post[which(!is.na(cces2016$CC16_421_rep))]=cces2016$CC16_421_rep[which(!is.na(cces2016$CC16_421_rep))]
cces2016$pid7post[which(!is.na(cces2016$CC16_421b))]=cces2016$CC16_421b[which(!is.na(cces2016$CC16_421b))]

cces2016$inputstate=as.character(cces2016$inputstate)

cces2016$us_house_majority	= as.character(cces2016$CC16_321a)
cces2016$us_senate_majority	= as.character(cces2016$CC16_321b)
cces2016$know_gov			= as.character(cces2016$CC16_322a)
cces2016$know_senate1		= as.character(cces2016$CC16_322b)
cces2016$know_senate2		= as.character(cces2016$CC16_322c)
cces2016$know_house			= as.character(cces2016$CC16_322d)
cces2016$party_gov			= as.character(cces2016$CurrentGovParty)
cces2016$party_senate1		= as.character(cces2016$CurrentSen1Party)
cces2016$party_senate2		= as.character(cces2016$CurrentSen2Party)
cces2016$party_house			= as.character(cces2016$CurrentHouseParty)
cces2016$dem_place			= as.character(cces2016$CC16_340g)
cces2016$rep_place			= as.character(cces2016$CC16_340h)
cces2016$self_place			= as.character(cces2016$CC16_340a)
cces2016$self_place_attempt			= as.character(cces2016$CC16_340a)

cces2016$guns_policy1 =  as.character(cces2016$CC16_330a)
cces2016$guns_policy2 =  as.character(cces2016$CC16_330b)
cces2016$guns_policy3 =  as.character(cces2016$CC16_330d)
cces2016$guns_policy4 =  as.character(cces2016$CC16_330e)

cces2016$immi_policy1 =  as.character(cces2016$CC16_331_1)
cces2016$immi_policy2 =  as.character(cces2016$CC16_331_2)
cces2016$immi_policy3 =  as.character(cces2016$CC16_331_3)
cces2016$immi_policy4 =  as.character(cces2016$CC16_331_4)
cces2016$immi_policy5 =  as.character(cces2016$CC16_331_5)
cces2016$immi_policy6 =  as.character(cces2016$CC16_331_6)
cces2016$immi_policy7 =  as.character(cces2016$CC16_331_7)
cces2016$immi_policy8 =  as.character(cces2016$CC16_331_8)
cces2016$immi_policy9 =  as.character(cces2016$CC16_331_9)

cces2016$abrt_policy1 =  as.character(cces2016$CC16_332a)
cces2016$abrt_policy2 =  as.character(cces2016$CC16_332b)
cces2016$abrt_policy3 =  as.character(cces2016$CC16_332c)
cces2016$abrt_policy4 =  as.character(cces2016$CC16_332d)
cces2016$abrt_policy5 =  as.character(cces2016$CC16_332e)
cces2016$abrt_policy6 =  as.character(cces2016$CC16_332f)

cces2016$envs_policy1 =  as.character(cces2016$CC16_333a)
cces2016$envs_policy2 =  as.character(cces2016$CC16_333b)
cces2016$envs_policy3 =  as.character(cces2016$CC16_333c)
cces2016$envs_policy4 =  as.character(cces2016$CC16_333d)

cces2016$crme_policy1 =  as.character(cces2016$CC16_334a)
cces2016$crme_policy2 =  as.character(cces2016$CC16_334b)
cces2016$crme_policy3 =  as.character(cces2016$CC16_334c)
cces2016$crme_policy4 =  as.character(cces2016$CC16_334d)

cces2016$gays_policy1 =  as.character(cces2016$CC16_335)

cces2016$budg_policy1 =  as.character(cces2016$CC16_337_1)
cces2016$budg_policy2 =  as.character(cces2016$CC16_337_2)
cces2016$budg_policy3 =  as.character(cces2016$CC16_337_3)

cces2016$roll_garland =  as.character(cces2016$CC16_351A)

cces2016$roll_tpp_act =  as.character(cces2016$CC16_351B)
cces2016$roll_trd_adj =  as.character(cces2016$CC16_351D)

cces2016$roll_usa_fre =  as.character(cces2016$CC16_351C)
cces2016$roll_iransct =  as.character(cces2016$CC16_351G)

cces2016$roll_educrfr =  as.character(cces2016$CC16_351E)
cces2016$roll_infrast =  as.character(cces2016$CC16_351F)

cces2016$roll_medicar =  as.character(cces2016$CC16_351H)
cces2016$roll_rpl_aca =  as.character(cces2016$CC16_351I)
cces2016$roll_minwage =  as.character(cces2016$CC16_351K)
# more covars here




cces2016$envs_policy1 = as.numeric(grepl(cces2016$envs_policy1,pattern='Oppose'))
cces2016$envs_policy2 = as.numeric(grepl(cces2016$envs_policy2,pattern='Oppose'))
cces2016$envs_policy3 = as.numeric(grepl(cces2016$envs_policy3,pattern='Oppose'))
cces2016$envs_policy4 = as.numeric(grepl(cces2016$envs_policy4,pattern='Oppose'))

cces2016$crme_policy1 = as.numeric(grepl(cces2016$crme_policy1,pattern='Oppose'))
cces2016$crme_policy2 = as.numeric(grepl(cces2016$crme_policy2,pattern='Oppose'))

cces2016$crme_policy3 = as.numeric(grepl(cces2016$crme_policy3,pattern='Support'))
cces2016$crme_policy4 = as.numeric(grepl(cces2016$crme_policy4,pattern='Support'))

cces2016$abrt_policy1 = as.numeric(grepl(cces2016$abrt_policy1,pattern='Oppose'))
cces2016$abrt_policy2 = as.numeric(grepl(cces2016$abrt_policy2,pattern='Support'))
cces2016$abrt_policy3 = as.numeric(grepl(cces2016$abrt_policy3,pattern='Support'))
cces2016$abrt_policy4 = as.numeric(grepl(cces2016$abrt_policy4,pattern='Support'))
cces2016$abrt_policy5 = as.numeric(grepl(cces2016$abrt_policy5,pattern='Support'))
cces2016$abrt_policy6 = as.numeric(grepl(cces2016$abrt_policy6,pattern='Support'))

cces2016$gays_policy1 = as.numeric(grepl(cces2016$gays_policy1,pattern='Oppose'))

cces2016$immi_policy1 = as.numeric(cces2016$immi_policy1=='No')
cces2016$immi_policy3 = as.numeric(cces2016$immi_policy3=='No')

na=grep(cces2016$immi_policy6,pattern='Asked')
cces2016$immi_policy6 = as.numeric(cces2016$immi_policy6=='No')
cces2016$immi_policy6[na]=NA

cces2016$immi_policy2 = as.numeric(cces2016$immi_policy2=='Yes')
cces2016$immi_policy7 = as.numeric(cces2016$immi_policy7=='Yes')

na=grep(cces2016$immi_policy4,pattern='Asked')
cces2016$immi_policy4 = as.numeric(cces2016$immi_policy4=='Yes')
cces2016$immi_policy4[na]=NA

na=grep(cces2016$immi_policy5,pattern='Asked')
cces2016$immi_policy5 = as.numeric(cces2016$immi_policy5=='Yes')
cces2016$immi_policy5[na]=NA

na=grep(cces2016$immi_policy8,pattern='Asked')
cces2016$immi_policy8 = as.numeric(cces2016$immi_policy8=='Yes')
cces2016$immi_policy8[na]=NA

cces2016$guns_policy1 = as.numeric(grepl(cces2016$guns_policy1,pattern='Oppose'))
cces2016$guns_policy3 = as.numeric(grepl(cces2016$guns_policy3,pattern='Oppose'))
cces2016$guns_policy2 = as.numeric(grepl(cces2016$guns_policy2,pattern='Support'))
cces2016$guns_policy4 = as.numeric(grepl(cces2016$guns_policy4,pattern='Support'))

na=grep(cces2016$roll_garland,pattern='Asked')
cces2016$roll_garland = as.numeric(grepl(cces2016$roll_garland,pattern='For'))
cces2016$roll_garland[na]=NA

na=grep(cces2016$roll_usa_fre,pattern='Asked')
cces2016$roll_usa_fre = as.numeric(grepl(cces2016$roll_usa_fre,pattern='For'))
cces2016$roll_usa_fre[na]=NA

cces2016$roll_iransct = as.numeric(grepl(cces2016$roll_iransct,pattern='For'))
#cces2016$roll_medicar = as.numeric(grepl(cces2016$roll_medicar,pattern='Against'))
cces2016$roll_medicar = as.numeric(grepl(cces2016$roll_medicar,pattern='For'))
cces2016$roll_rpl_aca = as.numeric(grepl(cces2016$roll_rpl_aca,pattern='For'))

na=grep(cces2016$roll_trd_adj,pattern='Asked')
cces2016$roll_trd_adj = as.numeric(grepl(cces2016$roll_trd_adj,pattern='Against'))
cces2016$roll_trd_adj[na]=NA

cces2016$roll_tpp_act = as.numeric(grepl(cces2016$roll_tpp_act,pattern='Against'))
cces2016$roll_infrast = as.numeric(grepl(cces2016$roll_infrast,pattern='Against'))
cces2016$roll_educrfr = as.numeric(grepl(cces2016$roll_educrfr,pattern='Against'))
cces2016$roll_minwage = as.numeric(grepl(cces2016$roll_minwage,pattern='Against'))

cces2016$self_place_attempt=1
cces2016$self_place_attempt[which(grepl(cces2016$self_place,pattern='Not sure') | grepl(cces2016$self_place,pattern='Skipped'))] = 0

cces2016$self_place[grep(cces2016$self_place,pattern='Very Liberal')] = -3
cces2016$self_place[grep(cces2016$self_place,pattern='Somewhat Liberal')] = -1
cces2016$self_place[grep(cces2016$self_place,pattern='Liberal')] = -2
cces2016$self_place[which(grepl(cces2016$self_place,pattern='Middle') | grepl(cces2016$self_place,pattern='Not sure') | grepl(cces2016$self_place,pattern='Skipped'))] = 0
cces2016$self_place[grep(cces2016$self_place,pattern='Very Conservative')] =  3
cces2016$self_place[grep(cces2016$self_place,pattern='Somewhat Conservative')] =  1
cces2016$self_place[grep(cces2016$self_place,pattern='Conservative')] =  2
cces2016$self_place=as.numeric(cces2016$self_place)

cces2016$dem_place[grep(cces2016$dem_place,pattern='Very Liberal')] = -3
cces2016$dem_place[grep(cces2016$dem_place,pattern='Somewhat Liberal')] = -1
cces2016$dem_place[grep(cces2016$dem_place,pattern='Liberal')] = -2
cces2016$dem_place[which(grepl(cces2016$dem_place,pattern='Middle') | grepl(cces2016$dem_place,pattern='Not sure') | grepl(cces2016$dem_place,pattern='Skipped'))] = 0
cces2016$dem_place[grep(cces2016$dem_place,pattern='Very Conservative')] =  3
cces2016$dem_place[grep(cces2016$dem_place,pattern='Somewhat Conservative')] =  1
cces2016$dem_place[grep(cces2016$dem_place,pattern='Conservative')] =  2
cces2016$dem_place=as.numeric(cces2016$dem_place)

cces2016$rep_place[grep(cces2016$rep_place,pattern='Very Liberal')] = -3
cces2016$rep_place[grep(cces2016$rep_place,pattern='Somewhat Liberal')] = -1
cces2016$rep_place[grep(cces2016$rep_place,pattern='Liberal')] = -2
cces2016$rep_place[which(grepl(cces2016$rep_place,pattern='Middle') | grepl(cces2016$rep_place,pattern='Not sure') | grepl(cces2016$rep_place,pattern='Skipped'))] = 0
cces2016$rep_place[grep(cces2016$rep_place,pattern='Very Conservative')] =  3
cces2016$rep_place[grep(cces2016$rep_place,pattern='Somewhat Conservative')] =  1
cces2016$rep_place[grep(cces2016$rep_place,pattern='Conservative')] =  2
cces2016$rep_place=as.numeric(cces2016$rep_place)




cces2016$sophistication = grepl(cces2016$us_house_majority,pattern='Republicans') + grepl(cces2016$us_senate_majority,pattern='Republicans')
cces2016$sophistication = (cces2016$sophistication +
			as.numeric(grepl(cces2016$know_gov,pattern='Republican')      & grepl(cces2016$party_gov,pattern='Republican')) +
			as.numeric(grepl(cces2016$know_gov,pattern='Democrat')        & grepl(cces2016$party_gov,pattern='Democrat')) +
			as.numeric(grepl(cces2016$know_gov,pattern='Independent')     & grepl(cces2016$party_gov,pattern='Independent')) +

			as.numeric(grepl(cces2016$know_senate1,pattern='Republican')  & grepl(cces2016$party_senate1,pattern='Republican')) +
			as.numeric(grepl(cces2016$know_senate1,pattern='Democrat')    & grepl(cces2016$party_senate1,pattern='Democrat')) +
			as.numeric(grepl(cces2016$know_senate1,pattern='Independent') & grepl(cces2016$party_senate1,pattern='Independent')) +

			as.numeric(grepl(cces2016$know_senate2,pattern='Republican')  & grepl(cces2016$party_senate2,pattern='Republican')) +
			as.numeric(grepl(cces2016$know_senate2,pattern='Democrat')    & grepl(cces2016$party_senate2,pattern='Democrat')) +
			as.numeric(grepl(cces2016$know_senate2,pattern='Independent') & grepl(cces2016$party_senate2,pattern='Independent')) +

			as.numeric(grepl(cces2016$know_house,pattern='Republican')    & grepl(cces2016$party_house,pattern='Republican')) +
			as.numeric(grepl(cces2016$know_house,pattern='Democrat')      & grepl(cces2016$party_house,pattern='Democrat')) +
			as.numeric(grepl(cces2016$know_house,pattern='Independent')   & grepl(cces2016$party_house,pattern='Independent')) +

			as.numeric(cces2016$dem_place<=cces2016$rep_place)
)
cces2016$newsint[grep(cces2016$newsint,pattern='Most of the time')]=4
cces2016$newsint[grep(cces2016$newsint,pattern='Some of the time')]=3
cces2016$newsint[grep(cces2016$newsint,pattern='Only now and then')]=2
cces2016$newsint[grep(cces2016$newsint,pattern='Hardly at all')]=1
cces2016$newsint[grep(cces2016$newsint,pattern="Don't know")]=0
cces2016$newsint[grep(cces2016$newsint,pattern="Skipped")]=0
cces2016$newsint=as.numeric(cces2016$newsint)


cces2016$budg_policy1 = gsub(cces2016$budg_policy1,pattern='Ranked ',replace='') # Budget Priorities - Cut Defense Spending
cces2016$budg_policy1[  grep(cces2016$budg_policy1,pattern='Skipped')]=2
cces2016$budg_policy1=as.numeric(as.factor(cces2016$budg_policy1))

cces2016$budg_policy2 = gsub(cces2016$budg_policy2,pattern='Ranked ',replace='') # Budget Priorities - Cut Domestic Spending
cces2016$budg_policy2[  grep(cces2016$budg_policy2,pattern='Skipped')]=2
cces2016$budg_policy2=as.numeric(as.factor(cces2016$budg_policy2))

cces2016$budg_policy3 = gsub(cces2016$budg_policy3,pattern='Ranked ',replace='') # Budget Priorities - Raise Taxes
cces2016$budg_policy3[  grep(cces2016$budg_policy3,pattern='Skipped')]=2
cces2016$budg_policy3=as.numeric(as.factor(cces2016$budg_policy3))

cces2016$taxs_policy1 = 1-as.numeric(cces2016$budg_policy3==1)
cces2016$taxs_policy2 = as.numeric(cces2016$budg_policy3==3)

cces2016$need_policy1 = as.numeric(cces2016$budg_policy2==1)
cces2016$need_policy2 = 1-as.numeric(cces2016$budg_policy2==3)

#cces2016$dfns_policy1 = 1-as.numeric(cces2016$budg_policy1==1)
cces2016$dfns_policy1 = as.numeric(cces2016$budg_policy1==1)
cces2016$dfns_policy2 = as.numeric(cces2016$budg_policy1==3)
cces2016$dfns_policy2[which(is.na(cces2016$dfns_policy2))]=0
# verify ideological orientation of items?
#lm(cces2016$self_place~cces2016$envs_policy1)$coef[2]>0
#lm(cces2016$self_place~cces2016$envs_policy2)$coef[2]>0
#lm(cces2016$self_place~cces2016$envs_policy3)$coef[2]>0
#lm(cces2016$self_place~cces2016$envs_policy4)$coef[2]>0

#lm(cces2016$self_place~cces2016$crme_policy1)$coef[2]>0
#lm(cces2016$self_place~cces2016$crme_policy2)$coef[2]>0
#lm(cces2016$self_place~cces2016$crme_policy3)$coef[2]>0
#lm(cces2016$self_place~cces2016$crme_policy4)$coef[2]>0

#lm(cces2016$self_place~cces2016$abrt_policy1)$coef[2]>0
#lm(cces2016$self_place~cces2016$abrt_policy2)$coef[2]>0
#lm(cces2016$self_place~cces2016$abrt_policy3)$coef[2]>0
#lm(cces2016$self_place~cces2016$abrt_policy4)$coef[2]>0
#lm(cces2016$self_place~cces2016$abrt_policy5)$coef[2]>0
#lm(cces2016$self_place~cces2016$abrt_policy6)$coef[2]>0

#lm(cces2016$self_place~cces2016$gays_policy1)$coef[2]>0

#lm(cces2016$self_place~cces2016$immi_policy1)$coef[2]>0
#lm(cces2016$self_place~cces2016$immi_policy2)$coef[2]>0
#lm(cces2016$self_place~cces2016$immi_policy3)$coef[2]>0
#lm(cces2016$self_place~cces2016$immi_policy4)$coef[2]>0
#lm(cces2016$self_place~cces2016$immi_policy5)$coef[2]>0
#lm(cces2016$self_place~cces2016$immi_policy6)$coef[2]>0
#lm(cces2016$self_place~cces2016$immi_policy7)$coef[2]>0
#lm(cces2016$self_place~cces2016$immi_policy8)$coef[2]>0

#lm(cces2016$self_place~cces2016$roll_tpp_act)$coef[2]>0
#lm(cces2016$self_place~cces2016$roll_trd_adj)$coef[2]>0

#lm(cces2016$self_place~cces2016$guns_policy1)$coef[2]>0
#lm(cces2016$self_place~cces2016$guns_policy2)$coef[2]>0
#lm(cces2016$self_place~cces2016$guns_policy3)$coef[2]>0
#lm(cces2016$self_place~cces2016$guns_policy4)$coef[2]>0

cces2016$libcon_envs=(((cces2016$envs_policy1+cces2016$envs_policy2+cces2016$envs_policy3+cces2016$envs_policy4)/4)-.5)/.5
cces2016$libcon_crme=(((cces2016$crme_policy1+cces2016$crme_policy2+cces2016$crme_policy3+cces2016$crme_policy4)/4)-.5)/.5
cces2016$libcon_abrt=(((cces2016$abrt_policy1+cces2016$abrt_policy2+cces2016$abrt_policy3+cces2016$abrt_policy4+cces2016$abrt_policy5+cces2016$abrt_policy6)/6)-.5)/.5
cces2016$libcon_gays=(cces2016$gays_policy1-.5)/.5
cces2016$libcon_trad = (((cces2016$roll_tpp_act + cces2016$roll_trd_adj)/2)-.5)/.5
cces2016$libcon_guns = (((cces2016$guns_policy1+cces2016$guns_policy2+cces2016$guns_policy3+cces2016$guns_policy4)/4)-.5)/.5

cces2016$libcon_immi=(((cces2016$immi_policy1+cces2016$immi_policy2+cces2016$immi_policy3+cces2016$immi_policy7)/4)-.5)/.5

#cces2016$roll_usa_fre
#cces2016$dfns_policy1 +
cces2016$libcon_dfns = (((cces2016$dfns_policy2  + cces2016$roll_usa_fre + cces2016$roll_iransct)/3)-.5)/.5
#cces2016$libcon_dfns = (((cces2016$dfns_policy1 + cces2016$dfns_policy2)/2)-.5)/.5
#cces2016$libcon_dfns = (((cces2016$roll_usa_fre + cces2016$roll_iransct)/2)-.5)/.5

cces2016$libcon_taxs = (((#cces2016$taxs_policy1 + cces2016$taxs_policy2 +
	cces2016$roll_educrfr + cces2016$roll_infrast + cces2016$roll_medicar +
	cces2016$roll_rpl_aca + cces2016$roll_minwage)/5)-.5)/.5

#cces2016$libcon_taxs = (((cces2016$taxs_policy1 + cces2016$taxs_policy2)/2)-.5)/.5

#cces2016$libcon_need = (((cces2016$need_policy1 + cces2016$need_policy2 +
#	cces2016$roll_educrfr + cces2016$roll_infrast + cces2016$roll_medicar +
#	cces2016$roll_rpl_aca + cces2016$roll_minwage)/7)-.5)/.5

cces2016$libcon_need = (((#cces2016$need_policy1 + cces2016$need_policy2 +
	#cces2016$roll_educrfr + cces2016$roll_infrast +
  cces2016$roll_medicar +
	cces2016$roll_rpl_aca + cces2016$roll_minwage)/3)-.5)/.5

cces2016$state=cces2016$inputstate
##### => indices for descriptive work

cces2016$pid=cces2016$pid3clean

# frequency of mis-classifications across issues
id=which(cces2016$pid==-1)
ir=which(cces2016$pid==1)
xmu=c(as.numeric(cces2016$libcon_envs[id]>0)+
as.numeric(cces2016$libcon_crme[id]>0)+
as.numeric(cces2016$libcon_abrt[id]>0)+
as.numeric(cces2016$libcon_gays[id]>0)+
as.numeric(cces2016$libcon_immi[id]>0)+
as.numeric(cces2016$libcon_trad[id]>0)+
as.numeric(cces2016$libcon_guns[id]>0)+
as.numeric(cces2016$libcon_dfns[id]>0)+
as.numeric(cces2016$libcon_taxs[id]>0)+
as.numeric(cces2016$libcon_need[id]>0),
as.numeric(cces2016$libcon_envs[ir]<0)+
as.numeric(cces2016$libcon_crme[ir]<0)+
as.numeric(cces2016$libcon_abrt[ir]<0)+
as.numeric(cces2016$libcon_gays[ir]<0)+
as.numeric(cces2016$libcon_immi[ir]<0)+
as.numeric(cces2016$libcon_trad[ir]<0)+
as.numeric(cces2016$libcon_guns[ir]<0)+
as.numeric(cces2016$libcon_dfns[ir]<0)+
as.numeric(cces2016$libcon_taxs[ir]<0)+
as.numeric(cces2016$libcon_need[ir]<0))

ymu=c(as.numeric(cces2016$libcon_envs[id]<0)+
as.numeric(cces2016$libcon_crme[id]<0)+
as.numeric(cces2016$libcon_abrt[id]<0)+
as.numeric(cces2016$libcon_gays[id]<0)+
as.numeric(cces2016$libcon_immi[id]<0)+
as.numeric(cces2016$libcon_trad[id]<0)+
as.numeric(cces2016$libcon_guns[id]<0)+
as.numeric(cces2016$libcon_dfns[id]<0)+
as.numeric(cces2016$libcon_taxs[id]<0)+
as.numeric(cces2016$libcon_need[id]<0),
as.numeric(cces2016$libcon_envs[ir]>0)+
as.numeric(cces2016$libcon_crme[ir]>0)+
as.numeric(cces2016$libcon_abrt[ir]>0)+
as.numeric(cces2016$libcon_gays[ir]>0)+
as.numeric(cces2016$libcon_immi[ir]>0)+
as.numeric(cces2016$libcon_trad[ir]>0)+
as.numeric(cces2016$libcon_guns[ir]>0)+
as.numeric(cces2016$libcon_dfns[ir]>0)+
as.numeric(cces2016$libcon_taxs[ir]>0)+
as.numeric(cces2016$libcon_need[ir]>0))

names(xmu)=c(id,ir)
names(ymu)=c(id,ir)

# kinder and kalmoe + identify the 20-30% who score highest on ideological knowledge/consistency
k_gov=cces2016$know_gov
k_sen1=cces2016$know_senate1
k_sen2=cces2016$know_senate2

k_gov[which(cces2016$know_gov=='Republican')]=1
k_gov[grep(cces2016$know_gov,pattern='Independent')]=0
k_gov[which(cces2016$know_gov=='Democrat')]=-1
k_gov[!grepl(k_gov,pattern='[0,1]')]=-2

k_sen1[which(cces2016$know_senate1=='Republican')]=1
k_sen1[grep(cces2016$know_senate1,pattern='Independent')]=0
k_sen1[which(cces2016$know_senate1=='Democrat')]=-1
k_sen1[!grepl(k_sen1,pattern='[0,1]')]=-2

k_sen2[which(cces2016$know_senate2=='Republican')]=1
k_sen2[grep(cces2016$know_senate2,pattern='Independent')]=0
k_sen2[which(cces2016$know_senate2=='Democrat')]=-1
k_sen2[!grepl(k_sen2,pattern='[0,1]')]=-2

k_gov=as.numeric(k_gov)
k_sen1=as.numeric(k_sen1)
k_sen2=as.numeric(k_sen2)

sen2=sign(tapply(k_sen2[which(k_sen2> -2)],cces2016$state[which(k_sen2> -2)],mean,na.rm=T)) # sanders and angus king
sen2[which(names(sen2)=='Wyoming')]=1
sen2[which(names(sen2)=='Vermont')]=0
sen2[which(names(sen2)=='Maine')]=0
sen1=sign(tapply(k_sen1[which(k_sen1> -2)],cces2016$state[which(k_sen1> -2)],mean,na.rm=T))
gov=sign(tapply(k_gov[which(k_gov> -2)],cces2016$state[which(k_gov> -2)],mean,na.rm=T))

know_gov=know_senate2=know_senate1=array('Democrat',nrow(cces2016))
un_state=unique(cces2016$state)
for(j in 1:length(un_state)){
	if(un_state[j]!="District of Columbia"){

		ix=which(cces2016$state==un_state[j])

		if(gov[which(names(gov)==un_state[j])]==1){
			know_gov[ix]='Republican'
		}
		if(gov[which(names(gov)==un_state[j])]==0){
			know_gov[ix]='Other Party / Independent'
		}

		if(sen1[which(names(sen1)==un_state[j])]==1){
			know_senate1[ix]='Republican'
		}
		if(sen1[which(names(sen1)==un_state[j])]==0){
			know_senate1[ix]='Other Party / Independent'
		}

		if(sen2[which(names(sen2)==un_state[j])]==1){
			know_senate2[ix]='Republican'
		}
		if(sen2[which(names(sen2)==un_state[j])]==0){
			know_senate2[ix]='Other Party / Independent'
		}

	}
}


# knowledge battery
X=cbind(as.numeric(cces2016$know_senate1==know_senate1), 	# know party of sen1
as.numeric(cces2016$know_senate2==know_senate2),					# know party of sen2
as.numeric(cces2016$know_gov==know_gov),									# know party of gov
as.numeric(cces2016$us_house_majority=='Republicans'),		# know house majority
as.numeric(cces2016$us_senate_majority=='Republicans'),		# know senate majority
as.numeric(cces2016$self_place_attempt==1),								# attempt self place
as.numeric(cces2016$dem_place<cces2016$rep_place))		# place dems left of reps

colnames(X)=c('know_sen1','know_sen2','know_gov','know_house_maj','know_sen_maj','know_any_self_place','know_D_left_R')

Y=cces2016[,c("envs_policy1","envs_policy2","envs_policy3","envs_policy4",
"crme_policy1","crme_policy2","crme_policy3","crme_policy4",
"abrt_policy1","abrt_policy2","abrt_policy3","abrt_policy4","abrt_policy5","abrt_policy6",
"gays_policy1",
"immi_policy1","immi_policy2","immi_policy3",
#"immi_policy4","immi_policy5","immi_policy6",
"immi_policy7",
#"immi_policy8",
"roll_tpp_act","roll_trd_adj",
"guns_policy1","guns_policy2","guns_policy3","guns_policy4",
"dfns_policy2","roll_usa_fre","roll_iransct",
"roll_educrfr","roll_infrast","roll_medicar","roll_rpl_aca","roll_minwage")]

########################################################
########################################################
########################################################
# Descriptive Analyses and Figures Here
########################################################
########################################################
########################################################

#################################
#### plot density/hist of X correct + frequencies
# amoung partisans
ii=c(id,ir)
Xo=X[ii,]
summary(Xo)

### POOLING OVER ALL D AND R
pdf('appendix/figures/know_party_items_frequency_common.pdf')
hist(rowSums(Xo),xlab='Number of Correct Answers on 7 Party Knowledge Questions',main='',breaks=6)
dev.off()

i0=order(decreasing=T,colMeans(Xo))
X0=Xo[,i0]
pdf('appendix/figures/know_party_items_cumulative_common.pdf')
plot(main='',x=-10000,y=-1000,ylim=c(0,1),xlim=c(1,7),axes=F,xlab='',ylab='Percent Correct')
for(j in 1:ncol(X0)){
	points(x=j,y=mean(X0[,j]),col='blue') # margin
	points(x=j,y=mean(X0[,1:j]),col='darkblue',pch=19) # cumulative
	if(j > 1){
		lines(x=c(j,j-1),y=c(mean(X0[,1:j]),mean(X0[,1:(j-1)])),col='darkblue',lty=1)
		lines(x=c(j,j-1),y=c(mean(X0[,j]),mean(X0[,(j-1)])),col='blue',lty=2)
	}
}
axis(2)
axis(1,at=c(1,2,3,4,5,6,7),labels=FALSE)
legend(x=1,y=.2,legend=c('Marginal Correct','Cumulative Correct'),fill=c('darkblue','blue'),density=c(100,40),border='white')
lablist=c('Self Place       ','Know D < R       ', 'Know Gov. Pty.   ', 'Know Sen. II Pty.',
'Know Sen. I Pty. ', 'Know Hou. Maj.   ','Know Sen. Maj.  ')
text(.25+seq(1, 7, by=1), par("usr")[3] - 0.085, labels = lablist, srt = 310, pos = 1, xpd = TRUE,cex=.8)
dev.off()


#################################
#### plot density/hist of
### POOLING OVER ALL D AND R
pdf('appendix/figures/libcon_issue_inconsistency_common.pdf')
# frequency of cross/out-party positions
hist(xmu,xlab='Number of Party-Inconsistent Attitudes on 10 Issues',main='',breaks=8)
dev.off()
# frequency of in-party positions
pdf('appendix/figures/libcon_issue_consistency_common.pdf')
hist(ymu,xlab='Number of Party-Consistent Attitudes on 10 Issues',main='',breaks=8)
dev.off()

### POOLING OVER ALL D AND R; HIGH and LOW KNOWLEDGE
pdf('appendix/figures/libcon_issue_inconsistency_high_know_common.pdf')
# frequency of cross/out-party positions
hist(xmu[which(rowSums(Xo)>=6)],xlab='Number of Party-Inconsistent Attitudes on 10 Issues',main='',breaks=8)
dev.off()
# frequency of in-party positions
pdf('appendix/figures/libcon_issue_consistency_high_know_common.pdf')
hist(ymu[which(rowSums(Xo)>=6)],xlab='Number of Party-Consistent Attitudes on 10 Issues',main='',breaks=8)
dev.off()

pdf('appendix/figures/libcon_issue_inconsistency_low_know_common.pdf')
# frequency of cross/out-party positions
hist(xmu[which(rowSums(Xo)<6)],xlab='Number of Party-Inconsistent Attitudes on 10 Issues',main='',breaks=8)
dev.off()
# frequency of in-party positions
pdf('appendix/figures/libcon_issue_consistency_low_know_common.pdf')
hist(ymu[which(rowSums(Xo)<6)],xlab='Number of Party-Consistent Attitudes on 10 Issues',main='',breaks=8)
dev.off()

for(k in 1:7){
	pdf(paste('appendix/figures/libcon_issue_inconsistency_know_',k,'_commmpn.pdf',sep=''))
	# frequency of cross/out-party positions
	hist(xmu[which(rowSums(Xo)==k | rowSums(Xo)==k-1)],xlab='Number of Party-Inconsistent Attitudes on 10 Issues',main='',breaks=8)
	dev.off()
	# frequency of in-party positions
	pdf(paste('appendix/figures/libcon_issue_consistency_know_',k,'_common.pdf',sep=''))
	hist(ymu[which(rowSums(Xo)==k | rowSums(Xo)==k-1)],xlab='Number of Party-Consistent Attitudes on 10 Issues',main='',breaks=8)
	dev.off()
}


################################################################################
################################################################################
# inter item correlations and reliability benchmarks
################################################################################
################################################################################

# avg inter-item corr in 37 issue items
library(psych)
x.corr=c()
n0=0
for(i in 1:(ncol(Y)-1)){
	for(j in (i+1):ncol(Y)){
		#print(cor(use='complete.obs',Y[,i],Y[,j]))
		x.corr=c(x.corr,abs(cor(use='complete.obs',Y[,i],Y[,j])))
		n0=n0+1
	}
}
cronA=alpha(Y,check.keys=TRUE)
#raw_alpha std.alpha G6(smc) average_r S/N     ase mean  sd median_r
#		0.89      0.88     0.9      0.17 7.5 0.00061 0.38 0.2     0.15
# cronbach alpha is .88 => items are mapping same dimension reliably  | evidence of internal consistency w/n sample
rho=mean(x.corr)
#    ] 0.1689285
# average iter-item correlation is .16; strikes me as rather high, but relevant to benchmark

################################################################################
# BENCHMARK alpha and rho by high and low knowledge
################################################################################
# fix on most knowledgeable
C7=A7=array(NA,8)
iq=which(rowSums(X)==7)
cronA_iq=alpha(Y[iq,],check.keys=TRUE)
cronA_niq=alpha(Y[-c(iq),],check.keys=TRUE)

#(cronA_iq$total)[1]
#(cronA_niq$total)[1]
for(k in 1:8){
	iq=which(rowSums(X)==(k-1))
	A7[k]=alpha(Y[iq,],check.keys=TRUE)$total[1]

	x.corr=c()
	n0=0
	for(i in 1:(ncol(Y)-1)){
		for(j in (i+1):ncol(Y)){
			x.corr=c(x.corr,abs(cor(use='complete.obs',Y[iq,i],Y[iq,j])))
			n0=n0+1
		}
	}
	C7[k]=mean(abs(x.corr))#x.corr/n0
}

A7=unlist(A7)

A7; #0.7008052 0.6568972 0.7052517 0.7433666 0.7711684 0.8271232 0.8903477 0.9319513
C7; # 0.10942065 0.08543354 0.08814400 0.09415325 0.09988723 0.12404456 0.17816766 0.26201474
A7/A7[8]
#[1] 0.4176126 0.3260639 0.3364086 0.3593433 0.3812275 0.4734259 0.6799910 1.0000000
C7/C7[8]
#[1] 0.7519762 0.7048621 0.7567473 0.7976453 0.8274771 0.8875176 0.9553586 1.0000000
# - stratifying on least knowledgeable, gets reliability and correlation at 40% and 75% of that from
# stratifying on most knowledgeable
# half-full/empty, but tells an important point that (in)consistency here isn't just garbage data w/ error

# inter-item correlations w/n issue ares
cor(cces2016[,c("envs_policy1","envs_policy2","envs_policy3","envs_policy4")])
cor(cces2016[,c("crme_policy1","crme_policy2","crme_policy3","crme_policy4")])
cor(cces2016[,c("abrt_policy1","abrt_policy2","abrt_policy3","abrt_policy4","abrt_policy5","abrt_policy6")])
cor(cces2016[,c("gays_policy1","gays_policy1")])
cor(cces2016[,c("immi_policy1","immi_policy2","immi_policy3","immi_policy7")])#"immi_policy4","immi_policy5","immi_policy6",,"immi_policy8"
cor(cces2016[,c("roll_tpp_act","roll_trd_adj")])
cor(cces2016[,c("guns_policy1","guns_policy2","guns_policy3","guns_policy4")])
cor(cces2016[,c("dfns_policy2","roll_usa_fre","roll_iransct")])
cor(cces2016[,c("roll_educrfr","roll_infrast","roll_medicar","roll_rpl_aca","roll_minwage")])
cor(cces2016[,c("roll_medicar","roll_rpl_aca","roll_minwage")])

c(mean(abs(cor(cces2016[,c("envs_policy1","envs_policy2","envs_policy3","envs_policy4")])[lower.tri(x=cor(cces2016[,c("envs_policy1","envs_policy2","envs_policy3","envs_policy4")]), diag = FALSE)])),
  mean(abs(cor(cces2016[,c("crme_policy1","crme_policy2","crme_policy3","crme_policy4")])[lower.tri(x=cor(cces2016[,c("crme_policy1","crme_policy2","crme_policy3","crme_policy4")]), diag = FALSE)])),
  mean(abs(cor(cces2016[,c("abrt_policy1","abrt_policy2","abrt_policy3","abrt_policy4","abrt_policy5","abrt_policy6")])[lower.tri(x=cor(cces2016[,c("abrt_policy1","abrt_policy2","abrt_policy3","abrt_policy4","abrt_policy5","abrt_policy6")]), diag = FALSE)])),
#cor(cces2016[,c("gays_policy1","gays_policy1")])[lower.tri(x=cor(cces2016[,c()]), diag = FALSE)],
  mean(abs(cor(cces2016[,c("immi_policy1","immi_policy2","immi_policy3","immi_policy7")])[lower.tri(x=cor(cces2016[,c("immi_policy1","immi_policy2","immi_policy3","immi_policy7")]), diag = FALSE)])),
  mean(abs(cor(cces2016[,c("roll_tpp_act","roll_trd_adj")])[lower.tri(x=cor(cces2016[,c("roll_tpp_act","roll_trd_adj")]), diag = FALSE)])),
  mean(abs(cor(cces2016[,c("guns_policy1","guns_policy2","guns_policy3","guns_policy4")])[lower.tri(x=cor(cces2016[,c("guns_policy1","guns_policy2","guns_policy3","guns_policy4")]), diag = FALSE)])),
  mean(abs(cor(cces2016[,c("dfns_policy2","roll_usa_fre","roll_iransct")])[lower.tri(x=cor(cces2016[,c("dfns_policy2","roll_usa_fre","roll_iransct")]), diag = FALSE)])),
  mean(abs(cor(cces2016[,c("roll_educrfr","roll_infrast","roll_medicar","roll_rpl_aca","roll_minwage")])[lower.tri(x=cor(cces2016[,c("roll_educrfr","roll_infrast","roll_medicar","roll_rpl_aca","roll_minwage")]), diag = FALSE)])),
  mean(abs(cor(cces2016[,c("roll_medicar","roll_rpl_aca","roll_minwage")])[lower.tri(x=cor(cces2016[,c("roll_medicar","roll_rpl_aca","roll_minwage")]), diag = FALSE)]))
)

mean(c(mean(abs(cor(cces2016[,c("envs_policy1","envs_policy2","envs_policy3","envs_policy4")])[lower.tri(x=cor(cces2016[,c("envs_policy1","envs_policy2","envs_policy3","envs_policy4")]), diag = FALSE)])),
  mean(abs(cor(cces2016[,c("crme_policy1","crme_policy2","crme_policy3","crme_policy4")])[lower.tri(x=cor(cces2016[,c("crme_policy1","crme_policy2","crme_policy3","crme_policy4")]), diag = FALSE)])),
  mean(abs(cor(cces2016[,c("abrt_policy1","abrt_policy2","abrt_policy3","abrt_policy4","abrt_policy5","abrt_policy6")])[lower.tri(x=cor(cces2016[,c("abrt_policy1","abrt_policy2","abrt_policy3","abrt_policy4","abrt_policy5","abrt_policy6")]), diag = FALSE)])),
#cor(cces2016[,c("gays_policy1","gays_policy1")])[lower.tri(x=cor(cces2016[,c()]), diag = FALSE)],
  mean(abs(cor(cces2016[,c("immi_policy1","immi_policy2","immi_policy3","immi_policy7")])[lower.tri(x=cor(cces2016[,c("immi_policy1","immi_policy2","immi_policy3","immi_policy7")]), diag = FALSE)])),
  mean(abs(cor(cces2016[,c("roll_tpp_act","roll_trd_adj")])[lower.tri(x=cor(cces2016[,c("roll_tpp_act","roll_trd_adj")]), diag = FALSE)])),
  mean(abs(cor(cces2016[,c("guns_policy1","guns_policy2","guns_policy3","guns_policy4")])[lower.tri(x=cor(cces2016[,c("guns_policy1","guns_policy2","guns_policy3","guns_policy4")]), diag = FALSE)])),
  mean(abs(cor(cces2016[,c("dfns_policy2","roll_usa_fre","roll_iransct")])[lower.tri(x=cor(cces2016[,c("dfns_policy2","roll_usa_fre","roll_iransct")]), diag = FALSE)])),
  mean(abs(cor(cces2016[,c("roll_educrfr","roll_infrast","roll_medicar","roll_rpl_aca","roll_minwage")])[lower.tri(x=cor(cces2016[,c("roll_educrfr","roll_infrast","roll_medicar","roll_rpl_aca","roll_minwage")]), diag = FALSE)])),
  mean(abs(cor(cces2016[,c("roll_medicar","roll_rpl_aca","roll_minwage")])[lower.tri(x=cor(cces2016[,c("roll_medicar","roll_rpl_aca","roll_minwage")]), diag = FALSE)]))
)
)
#  0.2176849 mean inter-summary correlation

################################################################################
################################################################################
# above is summary
# below is getting to analysis =>
#		step 1: classify empirical proportion of (un)sophisticates from our measures
#   step 2: produce stratifications on our scaled attitudes, natural knowledge scales
#   	=> X, scaled(Y), X by scaled(Y)
################################################################################
################################################################################

# K&K as useful guide

#########################################
#1. Use the self-placement scale
mean(na.rm=T,abs(cces2016$self_place[c(id,ir)])==3)
#[1] 0.2414475
mean(na.rm=T,abs(cces2016$self_place[c(id,ir)])>=2)
#[1] 0.5501285
mean(na.rm=T,abs(cces2016$self_place[c(id,ir)])>=1)
#[1] 0.7741744

#########################################
#2. Knowledge
table(rowSums(Xo))/sum(table(rowSums(Xo)))
#0           1           2           3           4           5           6           7
#0.009689539 0.048447696 0.062487641 0.082855448 0.099861578 0.126952739 0.169863555 0.399841803
# 57% 6 or 7 correct; 40%  get all 7

#########################################
#3. Policy (in)consistency on 10 issue areas
table(xmu)/sum(table(xmu))
table(ymu)/sum(table(ymu))

c(mean(ymu==10),mean(ymu>=9),mean(ymu>=8),mean(ymu>=7),
mean(ymu>=6),mean(ymu>=5),mean(ymu>=4),mean(ymu>=3),
mean(ymu>=2),mean(ymu>=1))
#0.02669567 # all 10 consistent
#0.17124778 # 9 or 10
#0.36444532 # 8, 9, or 10
#0.52778327 # 7, 8, 9, or 10

c(mean(xmu==0),mean(xmu<=1),mean(xmu<=2),mean(xmu<=3),
mean(xmu<=4),mean(xmu<=5),mean(xmu<=6),mean(xmu<=7),
mean(xmu<=8),mean(xmu<=9))
#0.1465296 # 0 inconsistent positions
#0.4421594 # 1 or fewer inconsistent

#########################################
#4. Stability; aggregated estimates using pre/post

#########################################
#5. Interaction is take top scorers or knowledge and consistency
#c(id,ir)[which(xmu<=0)] #15%
#c(id,ir)[which(rowSums(Xo)>=7)] #40%
#c(id,ir)[which(xmu<=0&rowSums(Xo)>=7)] #9%

#c(id,ir)[which(xmu<=1)] #44%
#c(id,ir)[which(rowSums(Xo)>=6)] #57%
#c(id,ir)[which(xmu<=1 & rowSums(Xo)>=6)] #33%

# reasonable enough, though can also use scaling below, to get to 20 to 30% ....
###rnk=(rank(thetaK[c(id,ir)])+rank(abs(thetaZ)[c(id,ir)]))/2
#quantile(rnk,prob=c(.7,.8))
#c(id,ir)[which(rnk>=3411.5)] #30% thresh
#c(id,ir)[which(rnk>=3788.4)] #20% thresh

#included1=array(0,length(rnk))
#included2=array(0,length(rnk))

###H_included1 = rnk>=quantile(rnk,prob=c(.7,.8))[1]
H_included2 = xmu<=1 & rowSums(Xo)>=6

# included sophisticateds
#c(id,ir)[H_included1]
#c(id,ir)[H_included2]

# excluded non-sophisticateds
#c(id,ir)[!H_included1]
#c(id,ir)[!H_included2]

#quantile(rnk,prob=c(.2,.3))
####L_included1 = rnk<=quantile(rnk,prob=c(.2,.3))[2]
L_included2 = xmu>=2 & rowSums(Xo)<=5

# excluded least-sophisticateds
#c(id,ir)[L_included1]
#c(id,ir)[L_included2]

#M_included1 = !H_included1 & !L_included1
M_included2 = !H_included2 & !L_included2

#cces2016$respondent[c(id,ir)]
#cces2016$pid3[c(id,ir)]

indices=list(
#	'resp_id'=cces2016$respondent,
	'num_inconsistent'=xmu,
	'num_consistent'=ymu,
	'know7'=sort(c(id,ir)[which(rowSums(Xo)>=7)]),
	'know67'=sort(c(id,ir)[which(rowSums(Xo)>=6)]),
	'know0to5'=sort(c(id,ir)[which(rowSums(Xo)<=5)]),
	'know0to2'=sort(c(id,ir)[which(rowSums(Xo)<=2)]),
	'incon01'=sort(c(id,ir)[which(xmu<=1)]),
	'incon2to10'=sort(c(id,ir)[which(xmu>=2)]),
	'incon4to10'=sort(c(id,ir)[which(xmu>=4)]),
	#'H_ix1'=sort(c(id,ir)[H_included1]),
	'H_ix2'=sort(c(id,ir)[H_included2]),
	#'M_ix1'=sort(c(id,ir)[M_included1]),
	'M_ix2'=sort(c(id,ir)[M_included2]),
	#'L_ix1'=sort(c(id,ir)[L_included1]),
	'L_ix2'=sort(c(id,ir)[L_included2])
)

cces2016$know7=0
cces2016$know7[indices$know7]=1
cces2016$know7[-c(indices$know7)]=0

cces2016$H_M_L_sophisticated=NA
cces2016$H_M_L_sophisticated[indices$H_ix2]='H'
cces2016$H_M_L_sophisticated[indices$M_ix2]='M'
cces2016$H_M_L_sophisticated[indices$L_ix2]='L'


stateSophisticatedPrimary=cbind(tapply(cces2016$H_M_L_sophisticated[which(cces2016$valid_primary==1)]=='H',cces2016$state[which(cces2016$valid_primary==1)],mean,na.rm=T),
	tapply(cces2016$H_M_L_sophisticated[which(cces2016$valid_primary==1)]=='M',cces2016$state[which(cces2016$valid_primary==1)],mean,na.rm=T),
	tapply(cces2016$H_M_L_sophisticated[which(cces2016$valid_primary==1)]=='L',cces2016$state[which(cces2016$valid_primary==1)],mean,na.rm=T)
)

stateSophisticatedGeneral=cbind(tapply(cces2016$H_M_L_sophisticated[which(cces2016$valid_general==1)]=='H',cces2016$state[which(cces2016$valid_general==1)],mean,na.rm=T),
	tapply(cces2016$H_M_L_sophisticated[which(cces2016$valid_general==1)]=='M',cces2016$state[which(cces2016$valid_general==1)],mean,na.rm=T),
	tapply(cces2016$H_M_L_sophisticated[which(cces2016$valid_general==1)]=='L',cces2016$state[which(cces2016$valid_general==1)],mean,na.rm=T)
)


stateSophisticatedGeneral=t(stateSophisticatedGeneral)[,c(rownames(stateSophisticatedPrimary))]
stateSophisticatedGeneral=t(stateSophisticatedGeneral)

stateSophisticatedGeneral=stateSophisticatedGeneral[which(stateSophisticatedPrimary[,1]!=0 & stateSophisticatedPrimary[,1]!=1 & !is.nan(stateSophisticatedPrimary[,1])),]
stateSophisticatedPrimary=stateSophisticatedPrimary[which(stateSophisticatedPrimary[,1]!=0 & stateSophisticatedPrimary[,1]!=1 & !is.nan(stateSophisticatedPrimary[,1])),]
#length(indices$'H_ix2')

stateSophisticatedGeneral=stateSophisticatedGeneral[order(stateSophisticatedPrimary[,1]),]
stateSophisticatedPrimary=stateSophisticatedPrimary[order(stateSophisticatedPrimary[,1]),]


pdf('appendix/figures/sophistication_levels_validated_primary.pdf',height=5,width=6)
plot(cex=.65,x=1000,y=1000,ylim=c(-.3,.3),xlim=c(1,nrow(stateSophisticatedPrimary)),ylab='Difference in Low Minus High Sophisticated',xlab='State',axes=F)
axis(1,at=1:length(rownames(stateSophisticatedPrimary)),labels=FALSE)
text(seq(1, length(rownames(stateSophisticatedPrimary)), by=1), par("usr")[3] - 0.055, labels = rownames(stateSophisticatedPrimary), srt = 295, pos = 1, xpd = T,cex=.5)
for(i in 1:nrow(stateSophisticatedPrimary)){
	if(stateSophisticatedPrimary[i,2]+stateSophisticatedPrimary[i,3]>stateSophisticatedPrimary[i,1]){
		lines(x=c(i,i),y=c((stateSophisticatedPrimary[i,2]+stateSophisticatedPrimary[i,3])-stateSophisticatedPrimary[i,1],0),col='dodgerblue2',lwd=3)
	} else{
		lines(x=c(i,i),y=c(0,stateSophisticatedPrimary[i,2]+stateSophisticatedPrimary[i,3]-stateSophisticatedPrimary[i,1]),col='darkblue',lwd=3)
	}
	#lines(x=c(i,i)+.25,y=c(stateSophisticatedPrimary[i,2],0),col='dodgerblue2',lwd=1,lty=3)
	#lines(x=c(i,i)+.75,y=c(stateSophisticatedPrimary[i,2],0),col='lightblue',lwd=1,lty=2)
}
#points(stateSophisticatedPrimary[,1],col='darkblue')
points((stateSophisticatedPrimary[,2]+stateSophisticatedPrimary[,3]-stateSophisticatedPrimary[,1]),col='blue')
axis(2,)
abline(h=0,col='grey',lty=2)
text(x=17,y=.2,labels='Fewer Sophisticates',col='dodgerblue2',cex=.85)
text(x=27,y=-.2,labels='More Sophisticates',col='darkblue',cex=.85)
dev.off()

pdf('appendix/figures/sophistication_levels_validated_general.pdf',height=5,width=6)
plot(cex=.65,x=1000,y=1000,ylim=c(-.3,.4),xlim=c(1,nrow(stateSophisticatedGeneral)),ylab='Difference in Low Minus High Sophisticated',xlab='State',axes=F)
axis(1,at=1:length(rownames(stateSophisticatedGeneral)),labels=FALSE)
text(seq(1, length(rownames(stateSophisticatedGeneral)), by=1), par("usr")[3] - 0.055, labels = rownames(stateSophisticatedGeneral), srt = 295, pos = 1, xpd = T,cex=.5)
for(i in 1:nrow(stateSophisticatedGeneral)){
	if(stateSophisticatedGeneral[i,2]+stateSophisticatedGeneral[i,3]>stateSophisticatedGeneral[i,1]){
		lines(x=c(i,i),y=c((stateSophisticatedGeneral[i,2]+stateSophisticatedGeneral[i,3])-stateSophisticatedGeneral[i,1],0),col='dodgerblue2',lwd=3)
	} else{
		lines(x=c(i,i),y=c(0,stateSophisticatedGeneral[i,2]+stateSophisticatedGeneral[i,3]-stateSophisticatedGeneral[i,1]),col='darkblue',lwd=3)
	}
	#lines(x=c(i,i)+.25,y=c(stateSophisticatedGeneral[i,2],0),col='dodgerblue2',lwd=1,lty=3)
	#lines(x=c(i,i)+.75,y=c(stateSophisticatedGeneral[i,2],0),col='lightblue',lwd=1,lty=2)
}
#points(stateSophisticatedGeneral[,1],col='darkblue')
points((stateSophisticatedGeneral[,2]+stateSophisticatedGeneral[,3]-stateSophisticatedGeneral[,1]),col='blue')
axis(2,)
abline(h=0,col='grey',lty=2)
text(x=17,y=.375,labels='Fewer Sophisticates',col='dodgerblue2',cex=.85)
text(x=27,y=-.2,labels='More Sophisticates',col='darkblue',cex=.85)
dev.off()
#save(indices,
#	file='appendix/data/sophistication_indices.Rdata'
#)


#END
